1 Tidycensus Capabilities
Check what variables are available from the 5 year American Community Survey in 2019.
all_2019_variables <- load_variables(2019, "acs5")Obtain data such as median home value easily:
1.1 County level data
1.2 Using mapview
2 Getting Educational Data
2.1 The Actual Educational Data
Digging into 2016-2019 census data by using each one, this takes significant time on the census website learning the codes and API formatting.
get_historic_acs <- function(variables,
geography,
year,
summary_var = NULL) {
y <- list()
for (i in 1:length(year)) {
y[[i]] <- lapply(geography, function (x) {
tidycensus::get_acs (geography = x,
variables = variables,
summary_var = summary_var,
output = "tidy",
year = year[i])}) %>%
bind_rows() %>%
mutate(year = year[i]) }
y %>% bind_rows() } # Function to get all data instead of just most recent
year_range <- 2016:2019
geos_inc <- c("county")
ed_variable <- c('DP02_0059P',
'DP02_0060P',
'DP02_0061P',
'DP02_0062P',
'DP02_0063P',
'DP02_0064P',
'DP02_0065P')
ed_labels <- c('Less than 9th Grade',
'9th to 12th grade, no diploma',
'High school graduate',
'Some college, no degree',
"Associate's degree",
"Bachelor's degree",
'Grad/pro degree')
ed_level <- c(1:7)
ed_table <- as_tibble(bind_cols(ed_variable, ed_level, ed_labels), stringsAsFactors = FALSE)
ed_data <- get_historic_acs(variables = ed_variable,
geography = geos_inc,
year = 2018)
ed_data <- ed_data %>%
mutate(variable = case_when(variable == "DP02_0059P" ~ 'Less than 9th Grade',
variable == "DP02_0060P" ~ '9th to 12th grade, no diploma',
variable == "DP02_0061P" ~ 'High school graduate',
variable == "DP02_0062P" ~ 'Some college, no degree',
variable == "DP02_0063P" ~ 'Associate\'s degree',
variable == "DP02_0064P" ~ 'Bachelor\'s degree',
variable == "DP02_0065P" ~ 'Grad/pro degree'))
ed_data_2 <- get_acs(geography = "county",
variables = ed_variable,
year = 2018,
geometry = TRUE,
summary_var = "DP02_0059P",
shift_geo = T)
ed_data_2 <- ed_data_2 %>%
mutate(variable = case_when(variable == "DP02_0059P" ~ 'Less than 9th Grade',
variable == "DP02_0060P" ~ '9th to 12th grade, no diploma',
variable == "DP02_0061P" ~ 'High school graduate',
variable == "DP02_0062P" ~ 'Some college, no degree',
variable == "DP02_0063P" ~ 'Associate\'s degree',
variable == "DP02_0064P" ~ 'Bachelor\'s degree',
variable == "DP02_0065P" ~ 'Grad/pro degree'))2.2 Secondary Data for Plotting and Comparison
variable <-c("DP02_0011P",
"DP02_0067P",
"DP02_0069P",
"DP02_0090P",
"DP02_0092P",
"DP02_0111P",
"DP03_0005P",
"DP03_0021P",
"DP03_0028P",
"DP03_0088",
"DP03_0096P",
"DP03_0128P")
label <- c("%Householders living alone",
"%Bachelor's degree or higher",
"%Civilian veterans",
"%Born different state",
"%Foreign born",
"%Speak English only @ home",
"%Civilian LF - Unemployed",
"%Public trans to work",
"%Service occupations",
"$Per capita income",
"%Health insurance",
"%Below FPL - All people")
dp_table <- as.data.frame(cbind(variable, label))
dp_data <- get_historic_acs(variables=variable,
geography = geos_inc,
year = year_range) # REMINDER TO GGANIMATE LAST 5 YEARS2.3 Another way to get race data
# race_variables <- c("B02001_001",
# "B02001_002",
# "B02001_003",
# "B02001_004",
# "B02001_005")
#
# race_labels <- c("Total",
# "White",
# "Black",
# "American Indian",
# "Asian")
#
# race_table <- as_tibble(bind_cols(race_variables, race_labels))
#
# race_data <- get_historic_acs(variables = race_variables,
# geography = geos_inc,
# year = year_range)
#
# race_data <- race_data %>% mutate()
# Easier way to do it
race_data2 <- get_acs(geography = "county",
variables = racevars,
year = 2018,
geometry = TRUE,
summary_var = "B02001_001",
shift_geo = T)## Getting data from the 2014-2018 5-year ACS
## Using feature geometry obtained from the albersusa package
## Please note: Alaska and Hawaii are being shifted and are not to scale.
2.4 Joining the data
# ed_data <- ed_data %>%
# left_join(ed_table)
ed_data_join <- ed_data %>% select(variable, GEOID, NAME, estimate) %>% rename(education_level = variable)
race_data_join <- race_data2 %>% mutate(estimate_percent = estimate/summary_est) %>% select(GEOID, NAME, variable, estimate_percent, geometry) %>% rename(race = variable)
test <- ed_data_join %>% left_join(race_data_join, by = c("GEOID", "NAME"))2.5 A plot of educational attainment that should only be run on specific counties
2.6 Map of the racial data
race_data_join %>%
mutate(Percent = 100 * (estimate_percent)) %>%
ggplot(aes(fill = Percent, color = Percent)) +
facet_wrap(~ race) +
geom_sf() +
scale_fill_viridis_c(direction = -1) +
scale_color_viridis_c(direction = -1) +
labs(title = "Racial geography of the US",
caption = "Source: American Community Survey") +
theme_void()2.7 Map of the Educational Data
# just_geometry <- race_data_join %>% select(geometry, GEOID)
# ed_data_join <- ed_data_join %>% left_join(just_geometry, by = "GEOID")
ed_data_2 %>%
ggplot(aes(fill = estimate, color = estimate)) +
facet_wrap(~ factor(variable, levels = c('Less than 9th Grade',
'9th to 12th grade, no diploma',
'High school graduate',
'Some college, no degree',
"Associate's degree",
"Bachelor's degree",
'Grad/pro degree'))) +
geom_sf() +
scale_fill_viridis_c(direction = -1) +
scale_color_viridis_c(direction = -1) +
labs(title = "Educational Geography of the US",
caption = "Source: American Community Survey",
color = "Percent") +
theme_void() +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5))3 Visualizing the Relationship between Race and Education
race_data_state <- get_acs(geography = "state",
variables = racevars,
year = 2018,
geometry = TRUE,
summary_var = "B02001_001",
shift_geo = T)## Getting data from the 2014-2018 5-year ACS
## Using feature geometry obtained from the albersusa package
## Please note: Alaska and Hawaii are being shifted and are not to scale.
race_data_state <- race_data_state %>% as_tibble() %>%
select(-geometry, -GEOID, -summary_moe, -moe) %>%
rename(race = variable) %>%
mutate(race_percent = 100*(estimate/summary_est)) %>%
select(-summary_est, -estimate) %>%
mutate(race = as.factor(race))
ed_data_state <- get_acs(geography = "state",
variables = ed_variable,
year = 2018,
geometry = TRUE,
summary_var = "DP02_0059P",
shift_geo = T)## Getting data from the 2014-2018 5-year ACS
## Using feature geometry obtained from the albersusa package
## Using the ACS Data Profile
## Please note: Alaska and Hawaii are being shifted and are not to scale.
ed_data_state <- ed_data_state %>% as_tibble() %>%
select(-geometry, -GEOID, -summary_moe, -moe) %>%
rename(education_level = variable) %>%
select(-summary_est) %>%
mutate(education_level = case_when(education_level == "DP02_0059P" ~ 'Less than 9th Grade',
education_level == "DP02_0060P" ~ '9th to 12th grade, no diploma',
education_level == "DP02_0061P" ~ 'High school graduate',
education_level == "DP02_0062P" ~ 'Some college, no degree',
education_level == "DP02_0063P" ~ 'Associate\'s degree',
education_level == "DP02_0064P" ~ 'Bachelor\'s degree',
education_level == "DP02_0065P" ~ 'Grad/pro degree')) %>%
mutate(education_level = factor(education_level, levels = c('Less than 9th Grade',
'9th to 12th grade, no diploma',
'High school graduate',
'Some college, no degree',
"Associate's degree",
"Bachelor's degree",
'Grad/pro degree')))
race_education_state <- ed_data_state %>% left_join(race_data_state, by = c("NAME")) %>% rename(education_percent = estimate, state = NAME)
# Plot to go here3.1 Chord Diagram (Not Yet Finished)
library(chorddiag)
library(readxl)
race_education_data <- read_excel("/Users/dunk/Downloads/Book1.xlsx")
race_education_data_2 <- race_education_data %>% select(-`...1`) %>% mutate_if(is.numeric, .funs = funs(.*100))
test_matrix <- as.matrix(race_education_data_2)
colnames(test_matrix) <- c("All", "American Indian or Alaska Native", "Asian", "Black", "Hispanic",
"Native Hawaiian or other Pacific Islander", "White", "More than one race")
rownames(test_matrix) <- c("Less than high school", "High school graduate", "Some college but no degree",
"Associate degree", "Bachelor's degree", "Master's degree", "Professional degree",
"Doctoral degree")
groupColors <- c(RColorBrewer::brewer.pal(8, "Blues"))
chorddiag(test_matrix, type = "bipartite",
groupColors = groupColors,
groupnamePadding = 20, showTicks = F, categorynameFontsize = 0, groupnameFontsize = 12)